<!DOCTYPE html>
<html lang="en-us">
  <head>

    <meta http-equiv="content-type" content="text/html; charset=utf-8">
    
<meta charset="UTF-8">
<title>Reducing Words to Their Root Form | Elasticsearch: The Definitive Guide [2.x] | Elastic</title>
<link rel="home" href="index.html" title="Elasticsearch: The Definitive Guide [2.x]">
<link rel="up" href="languages.html" title="Dealing with Human Language">
<link rel="prev" href="sorting-collations.html" title="Sorting and Collations">
<link rel="next" href="algorithmic-stemmers.html" title="Algorithmic Stemmers">
<meta name="DC.type" content="Learn/Docs/Legacy/Elasticsearch/Definitive Guide/2.x">
<meta name="DC.subject" content="Elasticsearch">
<meta name="DC.identifier" content="2.x">
<meta name="robots" content="noindex,nofollow">
    <meta http-equiv="X-UA-Compatible" content="IE=edge">
    <meta name="viewport" content="width=device-width, initial-scale=1">
    <script src="https://cdn.optimizely.com/js/18132920325.js"></script>
    <link rel="apple-touch-icon" sizes="57x57" href="/apple-icon-57x57.png">
    <link rel="apple-touch-icon" sizes="60x60" href="/apple-icon-60x60.png">
    <link rel="apple-touch-icon" sizes="72x72" href="/apple-icon-72x72.png">
    <link rel="apple-touch-icon" sizes="76x76" href="/apple-icon-76x76.png">
    <link rel="apple-touch-icon" sizes="114x114" href="/apple-icon-114x114.png">
    <link rel="apple-touch-icon" sizes="120x120" href="/apple-icon-120x120.png">
    <link rel="apple-touch-icon" sizes="144x144" href="/apple-icon-144x144.png">
    <link rel="apple-touch-icon" sizes="152x152" href="/apple-icon-152x152.png">
    <link rel="apple-touch-icon" sizes="180x180" href="/apple-icon-180x180.png">
    <link rel="icon" type="image/png" href="/favicon-32x32.png" sizes="32x32">
    <link rel="icon" type="image/png" href="/android-chrome-192x192.png" sizes="192x192">
    <link rel="icon" type="image/png" href="/favicon-96x96.png" sizes="96x96">
    <link rel="icon" type="image/png" href="/favicon-16x16.png" sizes="16x16">
    <link rel="manifest" href="/manifest.json">
    <meta name="apple-mobile-web-app-title" content="Elastic">
    <meta name="application-name" content="Elastic">
    <meta name="msapplication-TileColor" content="#ffffff">
    <meta name="msapplication-TileImage" content="/mstile-144x144.png">
    <meta name="theme-color" content="#ffffff">
    <meta name="naver-site-verification" content="936882c1853b701b3cef3721758d80535413dbfd">
    <meta name="yandex-verification" content="d8a47e95d0972434">
    <meta name="localized" content="true">
    <meta name="st:robots" content="follow,index">
    <meta property="og:image" content="https://www.elastic.co/static/images/elastic-logo-200.png">
    <link rel="shortcut icon" href="/favicon.ico" type="image/x-icon">
    <link rel="icon" href="/favicon.ico" type="image/x-icon">
    <link rel="apple-touch-icon-precomposed" sizes="64x64" href="/favicon_64x64_16bit.png">
    <link rel="apple-touch-icon-precomposed" sizes="32x32" href="/favicon_32x32.png">
    <link rel="apple-touch-icon-precomposed" sizes="16x16" href="/favicon_16x16.png">
    <!-- Give IE8 a fighting chance -->
    <!--[if lt IE 9]>
    <script src="https://oss.maxcdn.com/html5shiv/3.7.2/html5shiv.min.js"></script>
    <script src="https://oss.maxcdn.com/respond/1.4.2/respond.min.js"></script>
    <![endif]-->
    <link rel="stylesheet" type="text/css" href="/guide/static/styles.css">
  </head>

  <!--© 2015-2021 Elasticsearch B.V. Copying, publishing and/or distributing without written permission is strictly prohibited.-->

  <body>
    <!-- Google Tag Manager -->
    <script>dataLayer = [];</script><noscript><iframe src="//www.googletagmanager.com/ns.html?id=GTM-58RLH5" height="0" width="0" style="display:none;visibility:hidden"></iframe></noscript>
    <script>(function(w,d,s,l,i){w[l]=w[l]||[];w[l].push({'gtm.start': new Date().getTime(),event:'gtm.js'});var f=d.getElementsByTagName(s)[0], j=d.createElement(s),dl=l!='dataLayer'?'&l='+l:'';j.async=true;j.src= '//www.googletagmanager.com/gtm.js?id='+i+dl;f.parentNode.insertBefore(j,f); })(window,document,'script','dataLayer','GTM-58RLH5');</script>
    <!-- End Google Tag Manager -->

    <!-- Global site tag (gtag.js) - Google Analytics -->
    <script async src="https://www.googletagmanager.com/gtag/js?id=UA-12395217-16"></script>
    <script>
      window.dataLayer = window.dataLayer || [];
      function gtag(){dataLayer.push(arguments);}
      gtag('js', new Date());
      gtag('config', 'UA-12395217-16');
    </script>

    <!--BEGIN QUALTRICS WEBSITE FEEDBACK SNIPPET-->
    <script type="text/javascript">
      (function(){var g=function(e,h,f,g){
      this.get=function(a){for(var a=a+"=",c=document.cookie.split(";"),b=0,e=c.length;b<e;b++){for(var d=c[b];" "==d.charAt(0);)d=d.substring(1,d.length);if(0==d.indexOf(a))return d.substring(a.length,d.length)}return null};
      this.set=function(a,c){var b="",b=new Date;b.setTime(b.getTime()+6048E5);b="; expires="+b.toGMTString();document.cookie=a+"="+c+b+"; path=/; "};
      this.check=function(){var a=this.get(f);if(a)a=a.split(":");else if(100!=e)"v"==h&&(e=Math.random()>=e/100?0:100),a=[h,e,0],this.set(f,a.join(":"));else return!0;var c=a[1];if(100==c)return!0;switch(a[0]){case "v":return!1;case "r":return c=a[2]%Math.floor(100/c),a[2]++,this.set(f,a.join(":")),!c}return!0};
      this.go=function(){if(this.check()){var a=document.createElement("script");a.type="text/javascript";a.src=g;document.body&&document.body.appendChild(a)}};
      this.start=function(){var a=this;window.addEventListener?window.addEventListener("load",function(){a.go()},!1):window.attachEvent&&window.attachEvent("onload",function(){a.go()})}};
      try{(new g(100,"r","QSI_S_ZN_emkP0oSe9Qrn7kF","https://znemkp0ose9qrn7kf-elastic.siteintercept.qualtrics.com/WRSiteInterceptEngine/?Q_ZID=ZN_emkP0oSe9Qrn7kF")).start()}catch(i){}})();
    </script><div id="ZN_emkP0oSe9Qrn7kF"><!--DO NOT REMOVE-CONTENTS PLACED HERE--></div>
    <!--END WEBSITE FEEDBACK SNIPPET-->

    <div id="elastic-nav" style="display:none;"></div>
    <script src="https://www.elastic.co/elastic-nav.js"></script>

    <!-- Subnav -->
    <div>
      <div>
        <div class="tertiary-nav d-none d-md-block">
          <div class="container">
            <div class="p-t-b-15 d-flex justify-content-between nav-container">
              <div class="breadcrum-wrapper"><span><a href="/guide/" style="font-size: 14px; font-weight: 600; color: #000;">Docs</a></span></div>
            </div>
          </div>
        </div>
      </div>
    </div>

    <div class="main-container">
      <section id="content">
        <div class="content-wrapper">

          <section id="guide" lang="en">
            <div class="container">
              <div class="row">
                <div class="col-xs-12 col-sm-8 col-md-8 guide-section">
                  <!-- start body -->
                  <div class="page_header">
<p>
  <strong>WARNING</strong>: The 2.x versions of Elasticsearch have passed their
  <a href="https://www.elastic.co/support/eol">EOL dates</a>. If you are running
  a 2.x version, we strongly advise you to upgrade.
</p>
<p>
  This documentation is no longer maintained and may be removed. For the latest
  information, see the <a href="https://www.elastic.co/guide/en/elasticsearch/reference/current/index.html">current
  Elasticsearch documentation</a>.
</p>
</div>
<div id="content">
<div class="breadcrumbs">
<span class="breadcrumb-link"><a href="index.html">Elasticsearch: The Definitive Guide [2.x]</a></span>
»
<span class="breadcrumb-link"><a href="languages.html">Dealing with Human Language</a></span>
»
<span class="breadcrumb-node">Reducing Words to Their Root Form</span>
</div>
<div class="navheader">
<span class="prev">
<a href="sorting-collations.html">« Sorting and Collations</a>
</span>
<span class="next">
<a href="algorithmic-stemmers.html">Algorithmic Stemmers »</a>
</span>
</div>
<div class="chapter">
<div class="titlepage"><div><div>
<h2 class="title">
<a id="stemming"></a>Reducing Words to Their Root Form<a class="edit_me edit_me_private" rel="nofollow" title="Editing on GitHub is available to Elastic" href="https://github.com/elastic/elasticsearch-definitive-guide/edit/2.x/230_Stemming/00_Intro.asciidoc">edit</a>
</h2>
</div></div></div>
<p>Most languages of the world are <em>inflected</em>, meaning that words can change
their form to express differences in the following:</p>
<div class="ulist itemizedlist">
<ul class="itemizedlist">
<li class="listitem">
<em>Number</em>:      fox, foxes
</li>
<li class="listitem">
<em>Tense</em>:       pay, paid, paying
</li>
<li class="listitem">
<em>Gender</em>:      waiter, waitress
</li>
<li class="listitem">
<em>Person</em>:      hear, hears
</li>
<li class="listitem">
<em>Case</em>:        I, me, my
</li>
<li class="listitem">
<em>Aspect</em>:      ate, eaten
</li>
<li class="listitem">
<em>Mood</em>:        so be it, were it so
</li>
</ul>
</div>
<p>While inflection aids expressivity, it interferes with retrievability, as a
single root <em>word sense</em> (or meaning) may be represented by many different
sequences of letters. English is a weakly inflected language (you could
ignore inflections and still get reasonable search results), but some other
languages are highly inflected and need extra work in order to achieve
high-quality search results.</p>
<p><em>Stemming</em> attempts to remove the differences between inflected forms of a
word, in order to reduce each word to its root form. For instance <code class="literal">foxes</code> may
be reduced to the root <code class="literal">fox</code>, to remove the difference between singular and
plural in the same way that we removed the difference between lowercase and
uppercase.</p>
<p>The root form of a word may not even be a real word. The words <code class="literal">jumping</code> and
<code class="literal">jumpiness</code> may both be stemmed to <code class="literal">jumpi</code>. It doesn’t matter—​as long as
the same terms are produced at index time and at search time, search will just
work.</p>
<p>If stemming were easy, there would be only one implementation. Unfortunately,
stemming is an inexact science that suffers from two issues: understemming
and overstemming.</p>
<p><em>Understemming</em> is the failure to reduce words with the same meaning to the same
root. For example, <code class="literal">jumped</code> and <code class="literal">jumps</code> may be reduced to <code class="literal">jump</code>, while
<code class="literal">jumping</code> may be reduced to <code class="literal">jumpi</code>.  Understemming reduces retrieval;
relevant documents are not returned.</p>
<p><em>Overstemming</em> is the failure to keep two words with distinct meanings separate.
For instance, <code class="literal">general</code> and <code class="literal">generate</code> may both be stemmed to <code class="literal">gener</code>.
Overstemming reduces precision: irrelevant documents are returned when they
shouldn’t be.</p>
<div class="sidebar">
<div class="titlepage"><div><div>
<p class="title"><strong>Lemmatization</strong></p>
</div></div></div>
<p>A <em>lemma</em> is the canonical, or dictionary, form of a set of related words—​the
lemma of <code class="literal">paying</code>, <code class="literal">paid</code>, and <code class="literal">pays</code> is <code class="literal">pay</code>.  Usually the lemma resembles
the words it is related to but sometimes it doesn’t — the lemma of <code class="literal">is</code>,
<code class="literal">was</code>, <code class="literal">am</code>, and <code class="literal">being</code> is <code class="literal">be</code>.</p>
<p>Lemmatization, like stemming, tries to group related words, but it goes one
step further than stemming in that it tries to group words by their <em>word
sense</em>, or meaning.  The same word may represent two  meanings—for example,<em>wake</em> can mean <em>to wake up</em> or <em>a funeral</em>.  While lemmatization would
try to distinguish these two word senses, stemming would incorrectly conflate
them.</p>
<p>Lemmatization is a much more complicated and expensive process that needs to
understand the context in which words appear in order to make decisions
about what they mean. In practice, stemming appears to be just as effective
as lemmatization, but with a much lower cost.</p>
</div>
<p>First we will discuss the two classes of stemmers available in Elasticsearch—<a class="xref" href="algorithmic-stemmers.html" title="Algorithmic Stemmers">Algorithmic Stemmers</a> and <a class="xref" href="dictionary-stemmers.html" title="Dictionary Stemmers">Dictionary Stemmers</a>—and then look at how to
choose the right stemmer for your needs in <a class="xref" href="choosing-a-stemmer.html" title="Choosing a Stemmer">Choosing a Stemmer</a>.  Finally,
we will discuss options for tailoring stemming in <a class="xref" href="controlling-stemming.html" title="Controlling Stemming">Controlling Stemming</a> and
<a class="xref" href="stemming-in-situ.html" title="Stemming in situ">Stemming in situ</a>.</p>






</div>
<div class="navfooter">
<span class="prev">
<a href="sorting-collations.html">« Sorting and Collations</a>
</span>
<span class="next">
<a href="algorithmic-stemmers.html">Algorithmic Stemmers »</a>
</span>
</div>
</div>

                  <!-- end body -->
                </div>
                <div class="col-xs-12 col-sm-4 col-md-4" id="right_col">
                  <div id="rtpcontainer" style="display: block;">
                    <div class="mktg-promo">
                      <h3>Most Popular</h3>
                      <ul class="icons">
                        <li class="icon-elasticsearch-white"><a href="https://www.elastic.co/webinars/getting-started-elasticsearch?baymax=default&amp;elektra=docs&amp;storm=top-video">Get Started with Elasticsearch: Video</a></li>
                        <li class="icon-kibana-white"><a href="https://www.elastic.co/webinars/getting-started-kibana?baymax=default&amp;elektra=docs&amp;storm=top-video">Intro to Kibana: Video</a></li>
                        <li class="icon-logstash-white"><a href="https://www.elastic.co/webinars/introduction-elk-stack?baymax=default&amp;elektra=docs&amp;storm=top-video">ELK for Logs &amp; Metrics: Video</a></li>
                      </ul>
                    </div>
                  </div>
                </div>
              </div>
            </div>
          </section>

        </div>


<div id="elastic-footer"></div>
<script src="https://www.elastic.co/elastic-footer.js"></script>
<!-- Footer Section end-->

      </section>
    </div>

<script src="/guide/static/jquery.js"></script>
<script type="text/javascript" src="/guide/static/docs.js"></script>
<script type="text/javascript">
  window.initial_state = {}</script>
  </body>
</html>
